# Libraries
library(tidyverse)
library(readr)
library(kableExtra)
library(bookdown)
library(tinytex)
library(rgdal)
library(broom)
library(sf)
library(gganimate)
library(transformr)

1 Analysis

# read data
daily_caloric_supply <- read_csv("Data/daily-caloric-supply-derived-from-carbohydrates-protein-and-fat.csv")
dietary_compositions <- read_csv("Data/dietary-compositions-by-commodity-group.csv")
overweight_calories <- read_csv("Data/share-of-adult-men-overweight-or-obese-vs-daily-supply-of-calories.csv")
# data wrangling
daily_caloric_supply <- daily_caloric_supply %>%
  rename_all(str_remove, pattern = "\\(FAO.+\\)") %>%
  select(-Code)

dietary_compositions <- dietary_compositions %>%
  rename_all(str_remove, pattern = "\\(FAO.+\\)") %>%
  select(-Code)
# data figure1
figure1 <- dietary_compositions %>% 
  filter(Entity == 'Australia') %>% 
  pivot_longer(cols = -c(Entity,Year),
               names_to = 'Variable',
               values_to = 'Value') %>% 
  ggplot(aes(x = Year,
             y = Value,
             fill = Variable)) +
  geom_area(color = 'white') +
  scale_fill_viridis_d() +
  labs(y = 'Kilocalories per Person per Day',
       title = 'Kilocalories per Person per Day in Australia') +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position = 'bottom',
        text = element_text(size = 8)) +
  transition_reveal(Year)

figure1
animate(figure1,
        res = 300,
        width = 2000,
        height = 1125,
        renderer = gifski_renderer())
Kilocalories per Person per Day in Australia

Figure 1.1: Kilocalories per Person per Day in Australia

anim_save("figure1.gif")

2 Data Explanation

According to Figure1.1, we can see the different colours represent different food groups and the larger the area, the greater the proportion of the Australian diet. It is clear that they prefer cereals and grains, meat, fats and sugary foods to pulses and starchy roots.

3 Reference

According to the Australian government’s dietary guidelines, these unhealthy diets have led to many Australian adults and about a quarter of children being overweight or obese, so it’s time to make changes for the sake of our health Grech, Rangan, and Allman-Farinelli (2018).

# read world map data
world <- readOGR(dsn = 'World_Countries_(Generalized)/.')
OGR data source with driver: ESRI Shapefile 
Source: "/Users/gu1gu1/Desktop/assignment4-group1/World_Countries_(Generalized)", layer: "World_Countries__Generalized_"
with 249 features
It has 7 fields
world_shp <- world %>% 
  st_as_sf()
world_data <- daily_caloric_supply %>%
  select(Entity, Year, `Calories from animal protein `) %>%
  merge(world_shp,
        by.x = "Entity",
        by.y = "COUNTRY") %>% 
  st_as_sf()

# data figure2
figure2 <- world_data %>%
  ggplot(aes(fill = `Calories from animal protein `)) +
  geom_sf(colour = NA) +
  labs(x = 'Longitude',
       y = 'Latitude',
       title = '  Year: {closest_state}') +
  scale_fill_viridis_c(na.value = 'grey') +
  theme_void() +
  theme(legend.position = 'bottom') +
  transition_states(states = Year)
figure2
animate(figure2,
        res = 300,
        width = 2000,
        height = 1125)
Year: {closest_state}

Figure 3.1: Year: {closest_state}

anim_save('figure2.gif')

4 Data Explanation

To better represent the variation in calories provided by animal protein around the world, I first downloaded data from a world map online, then matched it to the dataset I chose by country name, then filled in the colours according to the calories provided by animal protein to create Figure3.1. The closer the color is to green, the more calories from animal protein, and conversely the closer the color is to blue the less there is. Overall, the amount of calories from animal protein has increased worldwide.

5 Reference

Observing the Figure3.1 we can find that people living in North America, Oceania, and Europe consume more animal protein to provide calories, simply put, their diet composition prefers meat products, but also from the side to reflect the continued high consumption of livestock products in almost all developed countries Stoll-Kleemann and O’Riordan (2015).

# data wrangling
by_entity <- overweight_calories %>% 
  rename('caloric_supply' = 'Daily caloric supply (OWID based on UN FAO & historical sources)',
         'Overweight' = 'Overweight or Obese (NCDRisC (2017))') %>% 
  select(Entity, Year, caloric_supply, Overweight) %>%
  filter(Entity %in% c("Australia", "China",
                       "United States", "United Kingdom",
                       "South Africa", "Brazil"),
         Year >= 2000) %>%
  drop_na()
# linear models
by_entity2 <- by_entity %>%
  group_by(Entity)%>%
  nest()

fit_lm <- function(x){
  lm(Overweight~caloric_supply, data = x)
}
mapped_lm <- map(by_entity$data, fit_lm)
scatter_plot <-
  ggplot(data = by_entity,
       aes(x = caloric_supply, 
           y = Overweight)) + 
         geom_point(alpha = 0.4) +
  facet_wrap(~Entity, scales = "free") +
  geom_smooth(method = "lm")
scatter_plot

entity_model <- by_entity2 %>% 
                    mutate(model = map(data, function(x){
                      lm(Overweight~caloric_supply, data = x)
                      })
                      )

entity_model %>%
  mutate(tidy = map(model, tidy))
# A tibble: 6 × 4
# Groups:   Entity [6]
  Entity         data              model  tidy            
  <chr>          <list>            <list> <list>          
1 Australia      <tibble [15 × 3]> <lm>   <tibble [2 × 5]>
2 Brazil         <tibble [15 × 3]> <lm>   <tibble [2 × 5]>
3 China          <tibble [15 × 3]> <lm>   <tibble [2 × 5]>
4 South Africa   <tibble [15 × 3]> <lm>   <tibble [2 × 5]>
5 United Kingdom <tibble [15 × 3]> <lm>   <tibble [2 × 5]>
6 United States  <tibble [15 × 3]> <lm>   <tibble [2 × 5]>
entity_coefs <- entity_model %>%
                    mutate(tidy = map(model, tidy)) %>%
                    unnest(tidy) %>%
                    select(Entity, term, estimate)
tidy_entity_coefs <- entity_coefs %>%
                          pivot_wider(id_cols = c(Entity), 
                                      names_from =  term,
                                      values_from = estimate) %>%
                          rename(Intercept = `(Intercept)`,
                                 Slope = caloric_supply)
entity_glance <- entity_model %>% 
  mutate(glance = map(model, glance)) %>%
  unnest(glance) %>%
  select(Entity, r.squared, AIC, BIC)
Table1 = entity_glance
knitr::kable(Table1, booktabs = TRUE, "html",
             caption = "Goodness_of_fit_measures") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Table 5.1: Goodness_of_fit_measures
Entity r.squared AIC BIC
Australia 0.9079612 40.73470 42.85885
Brazil 0.9232400 48.68766 50.81181
China 0.9727043 46.42461 48.54876
South Africa 0.7375348 66.86584 68.98999
United Kingdom 0.0040535 77.89588 80.02003
United States 0.3628869 69.38632 71.51047

6 Data Explanation

The scatter plot shows that the Chinese fitted linear model is the best, while, according to Table5.1, we can find that Chinese linear model has a maximum r.squared value around 0.97 and there are relatively small AIC and BIC values of around 46.42 and 48.55 respectively. Therefore, we can find that Chinese people’s overweight or obese are more affected by calorie intake.

7 Reference

R-squared is the percentage of outcome variable variation explained by the model, and describes how close the data are to the fitted regression. In general, the higher the R-squared value, the better the model fits. AIC and BIC both aim at achieving a compromise between model goodness of fit and model complexity. The preferred models are those with minimum AIC/BIC (Yang and Berdine (2015)).

Grech, Amanda, Anna Rangan, and Margaret Allman-Farinelli. 2018. “Macronutrient Composition of the Australian Population’s Diet; Trends from Three National Nutrition Surveys 1983, 1995 and 2012.” Nutrients 10 (8): 1045.
Stoll-Kleemann, Susanne, and Tim O’Riordan. 2015. “The Sustainability Challenges of Our Meat and Dairy Diets.” Environment: Science and Policy for Sustainable Development 57 (3): 34–48.
Yang, Shengping, and Gilbert Berdine. 2015. “Model Selection and Model over-Fitting.” The Southwest Respiratory and Critical Care Chronicles 3 (12): 52–55.